1 Effect of UPSTM-Based Decorrelation on Feature Discovery

1.0.1 Loading the libraries

library("FRESA.CAD")
library(readxl)
library(igraph)
library(umap)
library(tsne)
library(entropy)

op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

1.1 Material and Methods

Data from the speech features

1.2 The Data


pd_speech_features <- as.data.frame(read_excel("~/GitHub/FCA/Data/pd_speech_features.xlsx",sheet = "pd_speech_features", range = "A2:ACB758"))

1.2.1 The Average of the Three Repetitions

Each subject had three repeated observations. Here I’ll use the average of the three experiments per subject.

rep1Parkison <- subset(pd_speech_features,RID==1)
rownames(rep1Parkison) <- rep1Parkison$id
rep1Parkison$id <- NULL
rep1Parkison$RID <- NULL
rep1Parkison[,1:ncol(rep1Parkison)] <- sapply(rep1Parkison,as.numeric)

rep2Parkison <- subset(pd_speech_features,RID==2)
rownames(rep2Parkison) <- rep2Parkison$id
rep2Parkison$id <- NULL
rep2Parkison$RID <- NULL
rep2Parkison[,1:ncol(rep2Parkison)] <- sapply(rep2Parkison,as.numeric)

rep3Parkison <- subset(pd_speech_features,RID==3)
rownames(rep3Parkison) <- rep3Parkison$id
rep3Parkison$id <- NULL
rep3Parkison$RID <- NULL
rep3Parkison[,1:ncol(rep3Parkison)] <- sapply(rep3Parkison,as.numeric)

whof <- !(colnames(rep1Parkison) %in% c("gender","class"));
avgParkison <- rep1Parkison;
avgParkison[,whof] <- (rep1Parkison[,whof] + rep2Parkison[,whof] + rep3Parkison[,whof])/3


signedlog <- function(x) { return (sign(x)*log(abs(1.0e12*x)+1.0))}
whof <- !(colnames(avgParkison) %in% c("gender","class"));
avgParkison[,whof] <- signedlog(avgParkison[,whof])

1.2.1.1 Standarize the names for the reporting

studyName <- "Parkinsons"
dataframe <- avgParkison
outcome <- "class"

TopVariables <- 10

thro <- 0.80
cexheat = 0.15

1.3 Generaring the report

1.3.1 Libraries

Some libraries

library(psych)
library(whitening)
library("vioplot")
library("rpart")

1.3.2 Data specs

pander::pander(c(rows=nrow(dataframe),col=ncol(dataframe)-1))
rows col
252 753
pander::pander(table(dataframe[,outcome]))
0 1
64 188

varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]

largeSet <- length(varlist) > 1500 

1.3.3 Scaling the data

Scaling and removing near zero variance columns and highly co-linear(r>0.99999) columns


  ### Some global cleaning
  sdiszero <- apply(dataframe,2,sd) > 1.0e-16
  dataframe <- dataframe[,sdiszero]

  varlist <- colnames(dataframe)[colnames(dataframe) != outcome]
  tokeep <- c(as.character(correlated_Remove(dataframe,varlist,thr=0.99999)),outcome)
  dataframe <- dataframe[,tokeep]

  varlist <- colnames(dataframe)
  varlist <- varlist[varlist != outcome]
  
  iscontinous <- sapply(apply(dataframe,2,unique),length) > 5 ## Only variables with enough samples



dataframeScaled <- FRESAScale(dataframe,method="OrderLogit")$scaledData

1.4 The heatmap of the data

numsub <- nrow(dataframe)
if (numsub > 1000) numsub <- 1000


if (!largeSet)
{

  hm <- heatMaps(data=dataframeScaled[1:numsub,],
                 Outcome=outcome,
                 Scale=TRUE,
                 hCluster = "row",
                 xlab="Feature",
                 ylab="Sample",
                 srtCol=45,
                 srtRow=45,
                 cexCol=cexheat,
                 cexRow=cexheat
                 )
  par(op)
}

1.4.0.1 Correlation Matrix of the Data

The heat map of the data


if (!largeSet)
{

  par(cex=0.6,cex.main=0.85,cex.axis=0.7)
  #cormat <- Rfast::cora(as.matrix(dataframe[,varlist]),large=TRUE)
  cormat <- cor(dataframe[,varlist],method="pearson")
  cormat[is.na(cormat)] <- 0
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Original Correlation",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")
  diag(cormat) <- 0
  print(max(abs(cormat)))
}

[1] 0.9999951

1.5 The decorrelation


DEdataframe <- IDeA(dataframe,verbose=TRUE,thr=thro)
#> 
#>  Included: 744 , Uni p: 0.01657441 , Uncorrelated Base: 192 , Outcome-Driven Size: 0 , Base Size: 192 
#> 
#> 
 1 <R=1.000,r=0.975,N=  363>, Top: 78( 2 )[ 1 : 78 Fa= 77 : 0.975 ]( 77 , 204 , 0 ),<|>Tot Used: 281 , Added: 204 , Zero Std: 0 , Max Cor: 1.000
#> 
 2 <R=1.000,r=0.975,N=  363>, Top: 20( 4 )[ 1 : 20 Fa= 96 : 0.975 ]( 20 , 59 , 77 ),<|>Tot Used: 334 , Added: 59 , Zero Std: 0 , Max Cor: 1.000
#> 
 3 <R=1.000,r=0.975,N=  363>, Top: 12( 1 )[ 1 : 12 Fa= 108 : 0.975 ]( 12 , 21 , 96 ),<|>Tot Used: 349 , Added: 21 , Zero Std: 0 , Max Cor: 1.000
#> 
 4 <R=1.000,r=0.950,N=  195>, Top: 73( 5 )[ 1 : 73 Fa= 144 : 0.950 ]( 72 , 94 , 108 ),<|>Tot Used: 417 , Added: 94 , Zero Std: 0 , Max Cor: 0.991
#> 
 5 <R=0.991,r=0.945,N=  195>, Top: 23( 1 )[ 1 : 23 Fa= 153 : 0.945 ]( 23 , 27 , 144 ),<|>Tot Used: 426 , Added: 27 , Zero Std: 0 , Max Cor: 0.965
#> 
 6 <R=0.965,r=0.932,N=  195>, Top: 30( 1 )[ 1 : 30 Fa= 161 : 0.932 ]( 30 , 37 , 153 ),<|>Tot Used: 442 , Added: 37 , Zero Std: 0 , Max Cor: 0.950
#> 
 7 <R=0.950,r=0.925,N=  195>, Top: 13( 1 )[ 1 : 13 Fa= 166 : 0.925 ]( 13 , 13 , 161 ),<|>Tot Used: 448 , Added: 13 , Zero Std: 0 , Max Cor: 0.924
#> 
 8 <R=0.924,r=0.862,N=  173>, Top: 63( 2 )[ 1 : 63 Fa= 189 : 0.862 ]( 62 , 85 , 166 ),<|>Tot Used: 478 , Added: 85 , Zero Std: 0 , Max Cor: 0.981
#> 
 9 <R=0.981,r=0.891,N=  173>, Top: 7( 1 )[ 1 : 7 Fa= 192 : 0.891 ]( 7 , 7 , 189 ),<|>Tot Used: 481 , Added: 7 , Zero Std: 0 , Max Cor: 0.890
#> 
 10 <R=0.890,r=0.800,N=  180>, Top: 62( 6 )[ 1 : 62 Fa= 210 : 0.800 ]( 59 , 91 , 192 ),<|>Tot Used: 507 , Added: 91 , Zero Std: 0 , Max Cor: 0.930
#> 
 11 <R=0.930,r=0.815,N=  180>, Top: 12( 1 )[ 1 : 12 Fa= 215 : 0.815 ]( 12 , 14 , 210 ),<|>Tot Used: 511 , Added: 14 , Zero Std: 0 , Max Cor: 0.914
#> 
 12 <R=0.914,r=0.800,N=    9>, Top: 4( 1 )[ 1 : 4 Fa= 216 : 0.800 ]( 4 , 5 , 215 ),<|>Tot Used: 511 , Added: 5 , Zero Std: 0 , Max Cor: 0.799
#> 
 13 <R=0.799,r=0.800,N=    9>
#> 
 [ 13 ], 0.7994489 Decor Dimension: 511 Nused: 511 . Cor to Base: 133 , ABase: 11 , Outcome Base: 0 
#> 
varlistc <- colnames(DEdataframe)[colnames(DEdataframe) != outcome]

pander::pander(sum(apply(dataframe[,varlist],2,var)))

57178

pander::pander(sum(apply(DEdataframe[,varlistc],2,var)))

55983

pander::pander(entropy(discretize(unlist(dataframe[,varlist]), 256)))

4.68

pander::pander(entropy(discretize(unlist(DEdataframe[,varlistc]), 256)))

2.45

1.5.1 The decorrelation matrix


if (!largeSet)
{

  par(cex=0.6,cex.main=0.85,cex.axis=0.7)
  
  UPSTM <- attr(DEdataframe,"UPSTM")
  
  gplots::heatmap.2(1.0*(abs(UPSTM)>0),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Decorrelation matrix",
                    cexRow = cexheat,
                    cexCol = cexheat,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="|Beta|>0",
                    xlab="Output Feature", ylab="Input Feature")
  
  par(op)
}

1.6 The heatmap of the decorrelated data

if (!largeSet)
{

  hm <- heatMaps(data=DEdataframe[1:numsub,],
                 Outcome=outcome,
                 Scale=TRUE,
                 hCluster = "row",
                 cexRow = cexheat,
                 cexCol = cexheat,
                 srtCol=45,
                 srtRow=45,
                 xlab="Feature",
                 ylab="Sample")
  par(op)
}

1.7 The correlation matrix after decorrelation

if (!largeSet)
{

  cormat <- cor(DEdataframe[,varlistc],method="pearson")
  cormat[is.na(cormat)] <- 0
  
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Correlation after IDeA",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")
  
  par(op)
  diag(cormat) <- 0
  print(max(abs(cormat)))
}

[1] 0.7994489

1.8 U-MAP Visualization of features

1.8.1 The UMAP based on LASSO on Raw Data


if (nrow(dataframe) < 1000)
{
  classes <- unique(dataframe[1:numsub,outcome])
  raincolors <- rainbow(length(classes))
  names(raincolors) <- classes
  datasetframe.umap = umap(scale(dataframe[1:numsub,varlist]),n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: Original",t='n')
  text(datasetframe.umap$layout,labels=dataframe[1:numsub,outcome],col=raincolors[dataframe[1:numsub,outcome]+1])
}

1.8.2 The decorralted UMAP

if (nrow(dataframe) < 1000)
{

  datasetframe.umap = umap(scale(DEdataframe[1:numsub,varlistc]),n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: After IDeA",t='n')
  text(datasetframe.umap$layout,labels=DEdataframe[1:numsub,outcome],col=raincolors[DEdataframe[1:numsub,outcome]+1])
}

1.9 Univariate Analysis

1.9.1 Univariate



univarRAW <- uniRankVar(varlist,
               paste(outcome,"~1"),
               outcome,
               dataframe,
               rankingTest="AUC")

100 : std_MFCC_2nd_coef 200 : app_entropy_log_3_coef 300 : app_LT_TKEO_mean_7_coef 400 : tqwt_entropy_log_dec_15 500 : tqwt_medianValue_dec_7
600 : tqwt_stdValue_dec_35 700 : tqwt_skewnessValue_dec_27




univarDe <- uniRankVar(varlistc,
               paste(outcome,"~1"),
               outcome,
               DEdataframe,
               rankingTest="AUC",
               )

100 : std_MFCC_2nd_coef 200 : La_app_entropy_log_3_coef 300 : La_app_LT_TKEO_mean_7_coef 400 : La_tqwt_entropy_log_dec_15 500 : tqwt_medianValue_dec_7
600 : La_tqwt_stdValue_dec_35 700 : tqwt_skewnessValue_dec_27

1.9.2 Final Table


univariate_columns <- c("caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC")

##topfive
topvar <- c(1:length(varlist)) <= TopVariables
pander::pander(univarRAW$orderframe[topvar,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP ROCAUC
std_delta_delta_log_energy 23.4 0.469 22.8 0.461 0.653 0.798
std_delta_log_energy 24.3 0.477 23.8 0.441 0.634 0.794
std_9th_delta_delta 23.6 0.242 23.4 0.171 0.746 0.787
std_8th_delta_delta 23.7 0.240 23.4 0.150 0.725 0.780
std_7th_delta_delta 23.7 0.261 23.5 0.188 0.931 0.776
tqwt_entropy_log_dec_12 -39.6 0.239 -39.4 0.240 0.887 0.770
std_6th_delta_delta 23.8 0.277 23.5 0.172 0.945 0.768
std_8th_delta 24.4 0.245 24.2 0.163 0.981 0.767
std_9th_delta 24.4 0.249 24.1 0.185 0.398 0.764
tqwt_entropy_shannon_dec_12 30.3 1.993 32.1 1.703 0.196 0.763


topLAvar <- univarDe$orderframe$Name[str_detect(univarDe$orderframe$Name,"La_")]
topLAvar <- unique(c(univarDe$orderframe$Name[topvar],topLAvar[1:as.integer(TopVariables/2)]))
finalTable <- univarDe$orderframe[topLAvar,univariate_columns]

theLaVar <- rownames(finalTable)[str_detect(rownames(finalTable),"La_")]

pander::pander(univarDe$orderframe[topLAvar,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP ROCAUC
std_delta_log_energy 24.335 0.477 23.810 0.441 6.34e-01 0.794
std_8th_delta_delta 23.660 0.240 23.428 0.150 7.25e-01 0.780
tqwt_entropy_log_dec_12 -39.634 0.239 -39.390 0.240 8.87e-01 0.770
La_tqwt_entropy_log_dec_28 -0.633 0.430 -0.819 0.273 1.25e-07 0.758
La_std_2nd_delta 0.462 0.132 0.329 0.144 7.54e-01 0.754
mean_MFCC_2nd_coef 21.360 18.112 1.716 27.881 4.61e-07 0.753
La_tqwt_energy_dec_33 0.745 0.372 1.217 0.680 8.01e-01 0.736
La_tqwt_kurtosisValue_dec_33 6.360 0.407 5.975 0.553 1.62e-01 0.736
tqwt_kurtosisValue_dec_18 28.598 0.288 28.395 0.144 9.92e-01 0.734
La_apq11Shimmer 2.150 0.161 2.031 0.133 4.19e-01 0.734

dc <- getLatentCoefficients(DEdataframe)
fscores <- attr(DEdataframe,"fscore")

theSigDc <- dc[theLaVar]
names(theSigDc) <- NULL
theSigDc <- unique(names(unlist(theSigDc)))


theFormulas <- dc[rownames(finalTable)]
deFromula <- character(length(theFormulas))
names(deFromula) <- rownames(finalTable)

pander::pander(c(mean=mean(sapply(dc,length)),total=length(dc),fraction=length(dc)/(ncol(dataframe)-1)))
mean total fraction
2.57 469 0.63


allSigvars <- names(dc)



dx <- names(deFromula)[1]
for (dx in names(deFromula))
{
  coef <- theFormulas[[dx]]
  cname <- names(theFormulas[[dx]])
  names(cname) <- cname
  for (cf in names(coef))
  {
    if (cf != dx)
    {
      if (coef[cf]>0)
      {
        deFromula[dx] <- paste(deFromula[dx],
                               sprintf("+ %5.3f*%s",coef[cf],cname[cf]))
      }
      else
      {
        deFromula[dx] <- paste(deFromula[dx],
                               sprintf("%5.3f*%s",coef[cf],cname[cf]))
      }
    }
  }
}

finalTable <- rbind(finalTable,univarRAW$orderframe[theSigDc[!(theSigDc %in% rownames(finalTable))],univariate_columns])


orgnamez <- rownames(finalTable)
orgnamez <- str_remove_all(orgnamez,"La_")
finalTable$RAWAUC <- univarRAW$orderframe[orgnamez,"ROCAUC"]
finalTable$DecorFormula <- deFromula[rownames(finalTable)]
finalTable$fscores <- fscores[rownames(finalTable)]

Final_Columns <- c("DecorFormula","caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC","RAWAUC","fscores")

finalTable <- finalTable[order(-finalTable$ROCAUC),]
pander::pander(finalTable[,Final_Columns])
  DecorFormula caseMean caseStd controlMean controlStd controlKSP ROCAUC RAWAUC fscores
std_delta_log_energy 24.335 0.477 23.810 0.441 6.34e-01 0.794 0.794 2
std_8th_delta_delta 23.660 0.240 23.428 0.150 7.25e-01 0.780 0.780 6
tqwt_entropy_log_dec_12 -39.634 0.239 -39.390 0.240 8.87e-01 0.770 0.770 NA
La_tqwt_entropy_log_dec_28 + 1.000tqwt_entropy_log_dec_28 -0.981tqwt_entropy_log_dec_29 -0.633 0.430 -0.819 0.273 1.25e-07 0.758 0.654 -1
La_std_2nd_delta -0.907std_MFCC_2nd_coef + 1.000std_2nd_delta 0.462 0.132 0.329 0.144 7.54e-01 0.754 0.630 0
mean_MFCC_2nd_coef 21.360 18.112 1.716 27.881 4.61e-07 0.753 0.753 NA
La_tqwt_energy_dec_33 -0.919tqwt_energy_dec_32 + 1.000tqwt_energy_dec_33 0.745 0.372 1.217 0.680 8.01e-01 0.736 0.509 1
La_tqwt_kurtosisValue_dec_33 -0.788tqwt_kurtosisValue_dec_31 + 1.000tqwt_kurtosisValue_dec_33 6.360 0.407 5.975 0.553 1.62e-01 0.736 0.628 -1
tqwt_kurtosisValue_dec_18 28.598 0.288 28.395 0.144 9.92e-01 0.734 0.734 3
La_apq11Shimmer -0.907locShimmer + 1.000apq11Shimmer 2.150 0.161 2.031 0.133 4.19e-01 0.734 0.713 -1
apq11Shimmer NA 24.713 0.452 24.313 0.543 7.01e-01 0.713 0.713 NA
locShimmer NA 24.873 0.487 24.564 0.583 9.78e-01 0.663 0.663 4
tqwt_entropy_log_dec_28 NA -36.009 5.040 -36.613 0.482 3.82e-03 0.654 0.654 NA
std_2nd_delta NA 24.798 0.314 24.656 0.285 4.43e-01 0.630 0.630 NA
tqwt_kurtosisValue_dec_33 NA 29.796 0.904 29.434 0.819 5.74e-02 0.628 0.628 NA
tqwt_entropy_log_dec_29 NA -36.051 5.123 -36.477 0.317 9.23e-03 0.565 0.565 NA
tqwt_energy_dec_32 NA 18.264 1.945 18.116 2.434 2.91e-01 0.546 0.546 3
tqwt_energy_dec_33 NA 17.527 1.794 17.864 2.429 2.00e-01 0.509 0.509 NA
std_MFCC_2nd_coef NA 26.823 0.313 26.812 0.280 7.85e-01 0.508 0.508 1
tqwt_kurtosisValue_dec_31 NA 29.740 0.943 29.769 1.020 1.36e-01 0.490 0.490 3

1.10 Comparing IDeA vs PCA vs EFA

1.10.1 PCA

featuresnames <- colnames(dataframe)[colnames(dataframe) != outcome]
pc <- prcomp(dataframe[,iscontinous],center = TRUE,tol=0.002)   #principal components
predPCA <- predict(pc,dataframe[,iscontinous])
PCAdataframe <- as.data.frame(cbind(predPCA,dataframe[,!iscontinous]))
colnames(PCAdataframe) <- c(colnames(predPCA),colnames(dataframe)[!iscontinous]) 
#plot(PCAdataframe[,colnames(PCAdataframe)!=outcome],col=dataframe[,outcome],cex=0.65,cex.lab=0.5,cex.axis=0.75,cex.sub=0.5,cex.main=0.75)

#pander::pander(pc$rotation)


PCACor <- cor(PCAdataframe[,colnames(PCAdataframe) != outcome])


  gplots::heatmap.2(abs(PCACor),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "PCA Correlation",
                    cexRow = 0.5,
                    cexCol = 0.5,
                     srtCol=45,
                     srtRow= -45,
                    key.title=NA,
                    key.xlab="Pearson Correlation",
                    xlab="Feature", ylab="Feature")

1.10.2 EFA


EFAdataframe <- dataframeScaled

if (length(iscontinous) < 2000)
{
  topred <- min(length(iscontinous),nrow(dataframeScaled),ncol(predPCA)/2)
  if (topred < 2) topred <- 2
  
  uls <- fa(dataframeScaled[,iscontinous],nfactors=topred,rotate="varimax",warnings=FALSE)  # EFA analysis
  predEFA <- predict(uls,dataframeScaled[,iscontinous])
  EFAdataframe <- as.data.frame(cbind(predEFA,dataframeScaled[,!iscontinous]))
  colnames(EFAdataframe) <- c(colnames(predEFA),colnames(dataframeScaled)[!iscontinous]) 


  
  EFACor <- cor(EFAdataframe[,colnames(EFAdataframe) != outcome])
  
  
    gplots::heatmap.2(abs(EFACor),
                      trace = "none",
    #                  scale = "row",
                      mar = c(5,5),
                      col=rev(heat.colors(5)),
                      main = "EFA Correlation",
                      cexRow = 0.5,
                      cexCol = 0.5,
                       srtCol=45,
                       srtRow= -45,
                      key.title=NA,
                      key.xlab="Pearson Correlation",
                      xlab="Feature", ylab="Feature")
}

1.11 Effect on CAR modeling

par(op)
par(xpd = TRUE)
dataframe[,outcome] <- factor(dataframe[,outcome])
rawmodel <- rpart(paste(outcome,"~."),dataframe,control=rpart.control(maxdepth=3))
pr <- predict(rawmodel,dataframe,type = "class")

  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(rawmodel,main="Raw",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(rawmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,dataframe[,outcome]==0))
  }


pander::pander(table(dataframe[,outcome],pr))
  0 1
0 39 25
1 3 185
pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.8333 0.78147 0.8772
    tp 0.7460 0.68760 0.7986
    se 0.9840 0.95408 0.9967
    sp 0.6094 0.47932 0.7290
    diag.ac 0.8889 0.84343 0.9249
    diag.or 96.2000 27.66232 334.5503
    nndx 1.6852 1.37805 2.3074
    youden 0.5934 0.43339 0.7257
    pv.pos 0.8810 0.82929 0.9215
    pv.neg 0.9286 0.80517 0.9850
    lr.pos 2.5191 1.85407 3.4228
    lr.neg 0.0262 0.00838 0.0818
    p.rout 0.1667 0.12284 0.2185
    p.rin 0.8333 0.78147 0.8772
    p.tpdn 0.3906 0.27104 0.5207
    p.tndp 0.0160 0.00330 0.0459
    p.dntp 0.1190 0.07854 0.1707
    p.dptn 0.0714 0.01498 0.1948
  • tab:

      Outcome + Outcome - Total
    Test + 185 25 210
    Test - 3 39 42
    Total 188 64 252
  • method: exact

  • digits: 2

  • conf.level: 0.95

pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.889 0.843 0.925
3 se 0.984 0.954 0.997
4 sp 0.609 0.479 0.729
6 diag.or 96.200 27.662 334.550

par(op)
par(xpd = TRUE)
DEdataframe[,outcome] <- factor(DEdataframe[,outcome])
IDeAmodel <- rpart(paste(outcome,"~."),DEdataframe,control=rpart.control(maxdepth=3))
pr <- predict(IDeAmodel,DEdataframe,type = "class")

  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(IDeAmodel,main="IDeA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(IDeAmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,DEdataframe[,outcome]==0))
  }

pander::pander(table(DEdataframe[,outcome],pr))
  0 1
0 46 18
1 6 182
pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.7937 0.7384 0.8419
    tp 0.7460 0.6876 0.7986
    se 0.9681 0.9318 0.9882
    sp 0.7188 0.5924 0.8240
    diag.ac 0.9048 0.8616 0.9380
    diag.or 77.5185 29.1251 206.3207
    nndx 1.4560 1.2312 1.9076
    youden 0.6868 0.5242 0.8122
    pv.pos 0.9100 0.8615 0.9458
    pv.neg 0.8846 0.7656 0.9565
    lr.pos 3.4421 2.3246 5.0967
    lr.neg 0.0444 0.0199 0.0990
    p.rout 0.2063 0.1581 0.2616
    p.rin 0.7937 0.7384 0.8419
    p.tpdn 0.2812 0.1760 0.4076
    p.tndp 0.0319 0.0118 0.0682
    p.dntp 0.0900 0.0542 0.1385
    p.dptn 0.1154 0.0435 0.2344
  • tab:

      Outcome + Outcome - Total
    Test + 182 18 200
    Test - 6 46 52
    Total 188 64 252
  • method: exact

  • digits: 2

  • conf.level: 0.95

pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.905 0.862 0.938
3 se 0.968 0.932 0.988
4 sp 0.719 0.592 0.824
6 diag.or 77.519 29.125 206.321

par(op)
par(xpd = TRUE)
PCAdataframe[,outcome] <- factor(PCAdataframe[,outcome])
PCAmodel <- rpart(paste(outcome,"~."),PCAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(PCAmodel,PCAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
  plot(PCAmodel,main="PCA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
  text(PCAmodel, use.n = TRUE,cex=0.75)
  ptab <- epiR::epi.tests(table(pr==0,PCAdataframe[,outcome]==0))
}

pander::pander(table(PCAdataframe[,outcome],pr))
  0 1
0 31 33
1 7 181
pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.8492 0.7989 0.8910
    tp 0.7460 0.6876 0.7986
    se 0.9628 0.9248 0.9849
    sp 0.4844 0.3575 0.6127
    diag.ac 0.8413 0.7902 0.8841
    diag.or 24.2900 9.8738 59.7548
    nndx 2.2364 1.6732 3.5424
    youden 0.4471 0.2823 0.5976
    pv.pos 0.8458 0.7903 0.8914
    pv.neg 0.8158 0.6567 0.9226
    lr.pos 1.8672 1.4701 2.3716
    lr.neg 0.0769 0.0356 0.1660
    p.rout 0.1508 0.1090 0.2011
    p.rin 0.8492 0.7989 0.8910
    p.tpdn 0.5156 0.3873 0.6425
    p.tndp 0.0372 0.0151 0.0752
    p.dntp 0.1542 0.1086 0.2097
    p.dptn 0.1842 0.0774 0.3433
  • tab:

      Outcome + Outcome - Total
    Test + 181 33 214
    Test - 7 31 38
    Total 188 64 252
  • method: exact

  • digits: 2

  • conf.level: 0.95

pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.841 0.790 0.884
3 se 0.963 0.925 0.985
4 sp 0.484 0.358 0.613
6 diag.or 24.290 9.874 59.755


par(op)

1.11.1 EFA


  EFAdataframe[,outcome] <- factor(EFAdataframe[,outcome])
  EFAmodel <- rpart(paste(outcome,"~."),EFAdataframe,control=rpart.control(maxdepth=3))
  pr <- predict(EFAmodel,EFAdataframe,type = "class")
  
  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(EFAmodel,main="EFA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(EFAmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,EFAdataframe[,outcome]==0))
  }


  pander::pander(table(EFAdataframe[,outcome],pr))
  0 1
0 45 19
1 12 176
  pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.7738 0.7171 0.824
    tp 0.7460 0.6876 0.799
    se 0.9362 0.8912 0.967
    sp 0.7031 0.5758 0.811
    diag.ac 0.8770 0.8300 0.915
    diag.or 34.7368 15.7115 76.800
    nndx 1.5642 1.2862 2.141
    youden 0.6393 0.4670 0.777
    pv.pos 0.9026 0.8520 0.940
    pv.neg 0.7895 0.6611 0.886
    lr.pos 3.1534 2.1589 4.606
    lr.neg 0.0908 0.0513 0.161
    p.rout 0.2262 0.1760 0.283
    p.rin 0.7738 0.7171 0.824
    p.tpdn 0.2969 0.1891 0.424
    p.tndp 0.0638 0.0334 0.109
    p.dntp 0.0974 0.0597 0.148
    p.dptn 0.2105 0.1138 0.339
  • tab:

      Outcome + Outcome - Total
    Test + 176 19 195
    Test - 12 45 57
    Total 188 64 252
  • method: exact

  • digits: 2

  • conf.level: 0.95

  pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.877 0.830 0.915
3 se 0.936 0.891 0.967
4 sp 0.703 0.576 0.811
6 diag.or 34.737 15.712 76.800
  par(op)